# load data
file = 'crimerates-by-state-2005.csv'
data = read.delim(file, header = TRUE, sep = ',')
# Print data
head(data)
# Load library for transformation
library(dplyr)
# remove outlier; District of Columbia
data_filter = data %>% filter(state != 'District of Columbia')
# Load library for plotting data
library(ggplot2)
# Create Plot and custom features
ggplot(data_filter, aes(murder, burglary)) +
geom_point(color = 'light blue') +
geom_smooth(method = "loess", size = .7, se = FALSE) +
ggtitle("Murders Vs Burglaries For States in U.S.",
subtitle = "Higher muder rates are usually associated with higher burglary rates.") +
labs(caption = "Source: Data Collected By Nathan Yau from U.S. Census Bureau",
x = "Murders per 100,000 residents",
y = "Burglaries\nper 100,000\nresidents") +
theme_classic() +
theme(plot.title = element_text(face = "bold", size = 18),
plot.subtitle = element_text(color = "light gray"),
plot.caption = element_text(color = "light gray"),
axis.title.y = element_text(angle = 0),
) +
scale_x_continuous(breaks=seq(0, 10, 2), limits = c(0, 10)) +
scale_y_continuous(breaks=seq(0, 1300, 250), limits = c(0, 1300))

# remove outlier; District of Columbia
data_filter_US = data_filter %>% filter(state != 'United States')
# Create Plot and custom features
ggplot(data_filter_US, aes(murder, burglary)) +
geom_point(aes(size = population), color = 'light blue', alpha = 0.5) +
geom_text(data = data_filter_US %>% filter(population >= 3000000), aes(label = state), size = 1.5) +
ggtitle("Murders Vs Burglaries For States in U.S.",
subtitle = "Higher muder rates are usually associated with higher burglary rates.") +
labs(caption = "Source: Data Collected By Nathan Yau from U.S. Census Bureau",
x = "Murders per 100,000 residents",
y = "Burglaries\nper 100,000\nresidents") +
theme_classic() +
theme(plot.title = element_text(face = "bold", size = 18),
plot.subtitle = element_text(color = "light gray"),
plot.caption = element_text(color = "light gray"),
axis.title.y = element_text(angle = 0),
) +
scale_x_continuous(breaks=seq(0, 10, 2), limits = c(0, 10)) +
scale_y_continuous(breaks=seq(0, 1300, 250), limits = c(0, 1300)) +
scale_size(range = c(1, 20), name = "Population (M)")

# load data
file_2 = "birth-rate.csv"
data_2 = read.delim(file_2, header = TRUE, sep = ',')
head(data_2)
# Create Plot and custom features
ggplot(data_2, aes(x = X2008)) +
geom_density(color = 'blue', fill = 'light blue', adjust = 1/3) +
ggtitle("Birthrate Density Plot for 2008") +
labs(caption = "Source: Data Collected By Nathan Yau from World Bank",
x = "Birth Rate",
y = "Density") +
theme_classic() +
theme(plot.title = element_text(face = "bold", size = 18),
plot.subtitle = element_text(color = "light gray"),
plot.caption = element_text(color = "light gray"),
axis.title.y = element_text(angle = 0),
) +
scale_x_continuous(breaks=seq(0, 62, 2), limits = c(0, 62)) +
scale_y_continuous(breaks=seq(0, 0.075, .005), limits = c(0, 0.075))
